********Clean and compile data needed for analysis from IFLS5********

cd "$ifls5rawdata"

***Clean Book 3A, which contains individuals' demographics, risk preference questions, education, work, religion
use b3a_cov.dta, clear

*Attentiveness questions
rename cp1 b3acp1
rename cp2 b3acp2
rename cp3 b3acp3

*Merge highest level of education
merge 1:1 hhid14 pid14 using b3a_dl1.dta, gen(_mergedl1) keepus(dl06)

*Merge risk questions
merge 1:1 hhid14 pid14 using b3a_si.dta, gen(_mergesi) keepus(si*)

*Work information
merge 1:1 hhid14 pid14 using b3a_tk1.dta, gen(_mergetk1) keepus(tk01 tk02 tk03 tk03 tk04 tk16d)
merge 1:1 hhid14 pid14 using b3a_tk2.dta, gen(_mergetk2) keepus(tk19ab tk24a)

*Religion and religiosity
merge 1:1 hhid14 pid14 using b3a_tr.dta, gen(_mergetr) keepus(tr06 tr07 tr11 tr12)

*Subjective well-being
merge 1:1 hhid14 pid14 using b3a_sw.dta, gen(_mergesw) keepus(sw04 sw05)

tempfile book3a_clean14
save "`book3a_clean14'", replace

***Clean Book 3B, which contains individuals' smoking behavior
use b3b_cov.dta, clear

*Smoking behavior
merge 1:1 hhid14 pid14 using b3b_km.dta, gen(_mergekm) keepus(km01* km04 km08*)


tempfile book3b_clean14
save "`book3b_clean14'", replace

***Clean Book EK, which contains individuals' cognitive measures

*Easier test
use ek_ek1.dta, clear

*Keep ones with results
keep if result == 1

gen score1 = 0
la var score1 "cum. score Rav SPM1"
replace score1 = score1 + 1 if ek1 == "E"
replace score1 = score1 + 1 if ek2 == "F"
replace score1 = score1 + 1 if ek3 == "A"
replace score1 = score1 + 1 if ek4 == "D"
replace score1 = score1 + 1 if ek5 == "C"
replace score1 = score1 + 1 if ek6 == "B"
replace score1 = score1 + 1 if ek7 == "E"
replace score1 = score1 + 1 if ek8 == "B"
replace score1 = score1 + 1 if ek9 == "C"
replace score1 = score1 + 1 if ek10 == "B"
replace score1 = score1 + 1 if ek11 == "C"
replace score1 = score1 + 1 if ek12 == "E"

gen score1_a = 0
la var score1_a "alt cum. score Rav SPM1 (same as score 2 in ek2)"
replace score1_a = score1_a + 1 if ek1 == "E"
replace score1_a = score1_a + 1 if ek2 == "F"
replace score1_a = score1_a + 1 if ek3 == "A"
replace score1_a = score1_a + 1 if ek4 == "D"
replace score1_a = score1_a + 1 if ek5 == "C"
replace score1_a = score1_a + 1 if ek6 == "B"
replace score1_a = score1_a + 1 if ek11 == "C"
replace score1_a = score1_a + 1 if ek12 == "E"

gen mscore1 = 0
la var mscore1 "cum. math score test 1"
replace mscore1 = mscore1 + 1 if ek13 == "B"
replace mscore1 = mscore1 + 1 if ek14 == "C"
replace mscore1 = mscore1 + 1 if ek15 == "C"
replace mscore1 = mscore1 + 1 if ek16 == "B"
replace mscore1 = mscore1 + 1 if ek17 == "C"

keep hhid14 pid14 score1 score1_a mscore1

tempfile cogmeasures1_clean14
save "`cogmeasures1_clean14'", replace

*Harder test
use ek_ek2.dta, clear

*Keep ones with results
keep if result == 1

gen score2 = 0
la var score2 "cum. score Rav SPM2 (same as score1_a)"
replace score2 = score2 + 1 if ek1 == "E"
replace score2 = score2 + 1 if ek2 == "F"
replace score2 = score2 + 1 if ek3 == "A"
replace score2 = score2 + 1 if ek4 == "D"
replace score2 = score2 + 1 if ek5 == "C"
replace score2 = score2 + 1 if ek6 == "B"
replace score2 = score2 + 1 if ek11 == "C"
replace score2 = score2 + 1 if ek12 == "E"

gen mscore2 = 0
la var mscore2 "cum. math score test 2"
replace mscore2 = mscore2 + 1 if ek18 == "B"
replace mscore2 = mscore2 + 1 if ek19 == "D"
replace mscore2 = mscore2 + 1 if ek20 == "C"
replace mscore2 = mscore2 + 1 if ek21 == "D"
replace mscore2 = mscore2 + 1 if ek22 == "B"

keep hhid14 pid14 score2 mscore2

merge 1:1 hhid14 pid14 using "`cogmeasures1_clean14'", gen(_mergecog)

egen ct = rowtotal(score2 mscore2 score1 score1_a mscore1)
drop if ct == 0

*If have two scores for same thing in same year, go with the score on the "harder" test first
gen dum = 0
replace dum = 1 if score2!= . & score1_a!=.

gen dist = .
replace dist = abs(score2 - score1_a) if dum == 1

gen rav_score = .
replace rav_score = score2 if score2 != .
replace rav_score = score1_a if score2 == . & score1_a != .
la var rav_score "Cum score Raven's SPM (0-8)"

keep hhid14 pid14 rav_score

tempfile bookek_clean14
save "`bookek_clean14'", replace

***Clean Book K, which contains HH roster and can be used to construct HH size
use bk_cov.dta

*Number of people; note hhsize not created here, but will create later
merge 1:1 hhid14 using bk_ar0.dta, gen(_mergear0)

tempfile bookk_clean14
save "`bookk_clean14'", replace

***Clean Book 2, which contains HH's experience of events/natural disasters, and crop information
use b2_cov.dta 

*Experiences of natural disasters, etc
merge 1:1 hhid14 using b2_nd1.dta, gen(_mergend1) keepus(nd01 nd02)

*Crop information
merge 1:1 hhid14 using b2_ut1.dta, gen(_mergeut1) keepus(ut00a ut07*)

tempfile book2_clean14
save "`book2_clean14'", replace

***HH income, assets, etc
cd "$reploc/programs"
do clean_ifls5_balsheet.do
tempfile hhbalsheet14full
save "`hhbalsheet14full'", replace

***Merge all books/information

*Individual
use "`book3a_clean14'", clear
merge 1:1 hhid14 pid14 using "`book3b_clean14'",  gen(_mergeb3b)
merge 1:1 hhid14 pid14 using "`bookek_clean14'",  gen(_mergeek)

tempfile individualdata14
save "`individualdata14'", replace

*Household
use "`bookk_clean14'", clear
merge m:1 hhid14 using "`book2_clean14'",  gen(_mergeb2)
merge m:1 hhid14 using "`hhbalsheet14full'", gen(_mergebalsheet)
drop if _mergebalsheet!=3

tempfile hhdata14
save "`hhdata14'"


*Merge individual and HH

*Merge tracking info
use ptrack.dta, clear
sort pidlink
quietly by pidlink:  gen dup = cond(_N==1,0,_n)
drop if dup!=0
tempfile ptrack_clean14
save "`ptrack_clean14'", replace

use "`individualdata14'", clear

*Drop duplicates
sort pidlink
quietly by pidlink:  gen dup = cond(_N==1,0,_n)
drop if dup!=0

merge 1:1 pidlink using "`ptrack_clean14'", gen(_mergeptrack)
drop if _mergeptrack!=3
drop _mergeptrack

merge m:1 hhid14 using "`hhdata14'", gen(_mergehh)
drop if _mergehh!=3

*Rename variables to know they are IFLS4 variables, except for pidlink, which is unique individual identifier across waves
foreach x of varlist _all {
if "`x'"!="pidlink" {
	rename `x' IFLS5_`x'
}
}

gen ifls=5

*Create hhsize for IFLS5, which did not have it for some reason
egen IFLS5_hhsize=max(IFLS5_pid14), by(IFLS5_hhid14) 
